
#Note that you only have to include the main file (compute.o)
#and modules that are *separately compiled*. Everything else is 
#automatically included in the compilation.
hostobjs = initutils.o io.o slice.o model_collectiveops.o
deviceobjs = collectiveops.o diagnostics.o integrators.o boundcond.o timestep.o diff.o shear.o coriolis.o forcing.o continuity.o hydro.o
main = compute.o

#Flags: 
#
#-g -G compiles debuggable code
#
#-O2 -use_fast_math uses all optimizations and replaces
#math functions with less precise, but faster versions
#
#--maxrregcount=64 sets maximum registers used by a 
#CUDA thread; Basically low register count increases
#occupancy in register limited programs, but causes
#register spilling to L1.
#
#-Xptxas -dlcm=ca caches global memory accesses to
#L1, but may cause overfetch because transactions
#from L1 are done in 128-byte cache lines, but
#transactions from L2/SGRAM are done as 32-byte 
#blocks.
CFLAGS = -gencode arch=compute_35,code=sm_35

#Searches also src/verification for additional includes
VPATH = verification:..

#Include initial configuration
include init.conf

#Include run configuration
include run.conf

#Include macro flags and their user defined values
include makefile.local

#-rdc=true?
all: hostobjs deviceobjs main
	nvcc $(CFLAGS) $(hostobjs) $(deviceobjs) $(main) -o ac_run

hostobjs: $(hostobjs)
%.o: %.cpp
	nvcc ${INIT_CONFS} ${RUN_CONFS} $(CFLAGS) -dc $< -o $@


deviceobjs: $(deviceobjs)
%.o: %.cu
	nvcc ${MFLAGS} $(CFLAGS) -dc $< -o $@


main: $(main)
%.o: %.cu
	nvcc ${INIT_CONFS} ${RUN_CONFS} $(CFLAGS) -dc $< -o $@




clean:
	@rm -f *.o
	@rm -f runme
	@rm data/animation/*.ani
	@rm data/*.ac
	@rm data/*.dat

cleandata:
	@rm data/animation/*
	@rm data/*

quickrun:
	make clean
	make -j
	./ac_run

